***************************************************;
***table1.sas                                   ***;
***************************************************;
***This information is used in Table 1 of the   ***;
***Davis, Grim, Haltiwanger, and Streitwieser   ***;
***2013 RESTAT paper.                           ***;
***************************************************;
***Output:                                      ***;
*** table1.lst                                  ***;
***************************************************;

options ls=80 obs=max;

****START MACRO CODE****;

*Macro to calculate fuzzed percentiles;

%macro d1(wt, var, wtvar);

proc univariate data=gs6300 noprint round = 0.001 vardef=wdf;
	var &var;
	freq &wt;
	output out=stat_&wtvar mean=mean std=std
	pctlpre = p 
	pctlpts = 0 1 2 4 5 6 9 10 11 24 25 26 49 50 51 74 75 76 
		89 90 91 94 95 96 98 99 100;

proc print data=stat_&wtvar;
	title1 "&wtvar Weighted: &var Stats";

data stat_&wtvar;
	set stat_&wtvar;
	mgit = 1;

proc sort data=stat_&wtvar;
	by mgit;

data stat_&wtvar;
	merge gs6300 stat_&wtvar;
	by mgit;

	mark = .;

	if &var ge p0 and &var le p2 then mark = 1;
	else if &var ge p4 and &var le p6 then mark = 5;
	else if &var ge p9 and &var le p11 then mark = 10;
	else if &var ge p24 and &var le p26 then mark = 25;
	else if &var ge p49 and &var le p51 then mark = 50;
	else if &var ge p74 and &var le p76 then mark = 75;
	else if &var ge p89 and &var le p91 then mark = 90;
	else if &var ge p94 and &var le p96 then mark = 95;
	else if &var ge p98 and &var le p100 then mark = 99;

proc sort data=stat_&wtvar;
	by mark;

proc summary data=stat_&wtvar;
	by mark;
	weight &wt;
	output out=fuzz_&wtvar
	mean(&var) = fuzz_&var;

proc print data=fuzz_&wtvar;
	var mark fuzz_&var;
	title1 "Fuzzed &wtvar Weighted: &var Percentiles";

proc datasets lib=work;
	delete stat_&wtvar fuzz_&wtvar;
run;
quit;

%mend d1;


****START PROGRAM****;

data gs6300;
	set elec.gs6300 (keep = ppn year fipsst cyfstn bigutil newind ww pe wt100 wtpe wttvs ee tvs sw wt lpe);
	uno = 1;

*Number of observations by year;

proc freq data=gs6300;
	tables year;
	title1 "Number of Observations by Year";

*Count the number of states;

proc sort data=gs6300 nodupkey out=st;
	by fipsst;

proc summary data=st;
	output out=ct_st
	sum(uno) = num_states
	;

proc print data=ct_st;
	var num_states;
	title1 "Number of States";

*Count the number of counties;

proc sort data=gs6300 nodupkey out=cou;
	by cyfstn;

proc summary data=cou;
	output out=ct_cou
	sum(uno) = num_counties
	;

proc print data=ct_cou;
	var num_counties;
	title1 "Number of Counties";

*Count the number of utilities;

proc sort data=gs6300 nodupkey out=bu;
	by bigutil;

proc summary data=bu;
	output out=ct_bu
	sum(uno) = num_bigutil
	;

proc print data=ct_bu;
	var num_bigutil;
	title1 "Number of Utilities";

*Count the number of industries (4-digit SIC);

data gs72;
	set gs6300;
	if year le 1986;

data gs87;
	set gs6300;
	if year ge 1987;

proc sort data=gs72 nodupkey out=ind72;
	by newind;

proc summary data=ind72;
	output out=ct_ind72
	sum(uno) = num_ind72
	;

proc print data=ct_ind72;
	var num_ind72;
	title1 "Number of 1972 SICs";

proc sort data=gs87 nodupkey out=ind87;
	by newind;

proc summary data=ind87;
	output out=ct_ind87
	sum(uno) = num_ind87
	;

proc print data=ct_ind87;
	var num_ind87;
	title1 "Number of 1987 SICs";

*Basic statistics;

data gs6300;
	set gs6300;
	wttvs100 = wttvs * 100;
	wtpe100 = wtpe * 100;

	wttvs_short = round(wttvs, 1);
	wtpe_short = round(wtpe, 1);

	if sw ne 0 then share_ee_sw = ee / sw;	
	else share_ee_sw = 0;

	if ww ne 0 then share_ee_ww = ee / ww;	
	else share_ee_ww = 0;

	if tvs ne 0 then share_ee_tvs = ee / tvs;	
	else share_ee_tvs = 0;

	wttvs_share = wttvs / 10;
	wtpe_share = wtpe / 10;

	mgit = 1;

proc sort data=gs6300;
	by year;

*Calculate aggregate ee, tvs, sw by year - ASM sample weighted;

proc summary data=gs6300;
	by year;
	weight wt;
	var ee tvs sw;
	output out=tot_uu
	sum(ee) = tot_ee_uu
	sum(tvs) = tot_tvs_uu 
	sum(sw) = tot_sw_uu;

data tot_uu;
	set tot_uu;
	ee_tvs_share_uu = tot_ee_uu / tot_tvs_uu;
	ee_sw_share_uu = tot_ee_uu / tot_sw_uu;

*Calculate aggregate ee, tvs, sw by year - ASM sample and shipments weighted;

proc summary data=gs6300;
	by year;
	weight wttvs;
	var ee tvs sw;
	output out=tot_ww
	sum(ee) = tot_ee_ww
	sum(tvs) = tot_tvs_ww 
	sum(sw) = tot_sw_ww;

data tot_ww;
	set tot_ww;
	ee_tvs_share_ww = tot_ee_ww / tot_tvs_ww;
	ee_sw_share_ww = tot_ee_ww / tot_sw_ww;

*Calculate aggregate ee, tvs, sw by year - ASM sample weighted;

proc summary data=gs6300;
	by year;
	weight wtpe;
	var ee tvs sw;
	output out=tot_wp
	sum(ee) = tot_ee_wp
	sum(tvs) = tot_tvs_wp 
	sum(sw) = tot_sw_wp;

data tot_wp;
	set tot_wp;
	ee_tvs_share_wp = tot_ee_wp / tot_tvs_wp;
	ee_sw_share_wp = tot_ee_wp / tot_sw_wp;

data tot;
	merge tot_uu tot_wp tot_ww;
	by year;

proc print data=tot;
	var year ee_tvs_share_uu ee_sw_share_uu
		ee_tvs_share_ww ee_sw_share_ww
		ee_tvs_share_wp ee_sw_share_wp;
	title1 "Ratios by Year";

proc sort data=gs6300;
	by mgit;

*Calculate fuzzed percentiles;

%d1(wttvs, pe, tvs)
%d1(wttvs, lpe, tvs)
%d1(wttvs_share, share_ee_sw, tvs)
%d1(wttvs_share, share_ee_tvs, tvs)

%d1(wtpe, pe, pe)
%d1(wtpe, lpe, pe)
%d1(wtpe_share, share_ee_sw, pe)
%d1(wtpe_share, share_ee_tvs, pe)

%d1(wt100, pe, asm)
%d1(wt100, lpe, asm)
%d1(wt100, share_ee_sw, asm)
%d1(wt100, share_ee_tvs, asm)

run;


